Preparing the data
First we need to import the data and clean up any issues
Read in the file
df <- read_csv(here::here("data", "week11", "co2_global_emissions.csv"))
Take a look at the data
I use the str function to inspect the structure of the dataframe. There are two columns for the inidicator which in this case are all the same and can be removed. It also makes sense to convert the table to a long format instead of having each year represented by its own column.
str(df)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 264 obs. of 63 variables:
## $ Country Name : chr "Aruba" "Afghanistan" "Angola" "Albania" ...
## $ Country Code : chr "ABW" "AFG" "AGO" "ALB" ...
## $ Indicator Name: chr "CO2 emissions (metric tons per capita)" "CO2 emissions (metric tons per capita)" "CO2 emissions (metric tons per capita)" "CO2 emissions (metric tons per capita)" ...
## $ Indicator Code: chr "EN.ATM.CO2E.PC" "EN.ATM.CO2E.PC" "EN.ATM.CO2E.PC" "EN.ATM.CO2E.PC" ...
## $ 1960 : num NA 0.0461 0.0975 1.2582 NA ...
## $ 1961 : num NA 0.0536 0.079 1.3742 NA ...
## $ 1962 : num NA 0.0738 0.2013 1.44 NA ...
## $ 1963 : num NA 0.0742 0.1925 1.1817 NA ...
## $ 1964 : num NA 0.0863 0.201 1.1117 NA ...
## $ 1965 : num NA 0.101 0.192 1.166 NA ...
## $ 1966 : num NA 0.108 0.246 1.333 NA ...
## $ 1967 : num NA 0.124 0.155 1.364 NA ...
## $ 1968 : num NA 0.115 0.256 1.52 NA ...
## $ 1969 : num NA 0.0868 0.4196 1.559 NA ...
## $ 1970 : num NA 0.15 0.529 1.753 NA ...
## $ 1971 : num NA 0.166 0.492 1.989 NA ...
## $ 1972 : num NA 0.131 0.635 2.516 NA ...
## $ 1973 : num NA 0.136 0.671 2.304 NA ...
## $ 1974 : num NA 0.156 0.652 1.849 NA ...
## $ 1975 : num NA 0.169 0.575 1.911 NA ...
## $ 1976 : num NA 0.155 0.416 2.014 NA ...
## $ 1977 : num NA 0.183 0.435 2.276 NA ...
## $ 1978 : num NA 0.163 0.646 2.531 NA ...
## $ 1979 : num NA 0.168 0.637 2.898 NA ...
## $ 1980 : num NA 0.133 0.599 1.935 NA ...
## $ 1981 : num NA 0.152 0.571 2.693 NA ...
## $ 1982 : num NA 0.165 0.485 2.625 NA ...
## $ 1983 : num NA 0.204 0.515 2.683 NA ...
## $ 1984 : num NA 0.235 0.487 2.694 NA ...
## $ 1985 : num NA 0.298 0.443 2.658 NA ...
## $ 1986 : num 2.868 0.271 0.427 2.665 NA ...
## $ 1987 : num 7.235 0.272 0.518 2.414 NA ...
## $ 1988 : num 10.026 0.248 0.446 2.332 NA ...
## $ 1989 : num 10.635 0.236 0.424 2.783 NA ...
## $ 1990 : num 26.375 0.213 0.42 1.678 7.467 ...
## $ 1991 : num 26.046 0.188 0.405 1.312 7.182 ...
## $ 1992 : num 21.4426 0.0997 0.4007 0.7747 6.9121 ...
## $ 1993 : num 22.0008 0.0892 0.4309 0.7238 6.7361 ...
## $ 1994 : num 21.036 0.08 0.281 0.6 6.494 ...
## $ 1995 : num 20.7719 0.0727 0.7692 0.6545 6.6621 ...
## $ 1996 : num 20.318 0.066 0.712 0.637 7.065 ...
## $ 1997 : num 20.4268 0.0596 0.4892 0.4904 7.2397 ...
## $ 1998 : num 20.5877 0.0552 0.4714 0.5603 7.6608 ...
## $ 1999 : num 20.3116 0.0423 0.5741 0.9602 7.9755 ...
## $ 2000 : num 26.1949 0.0385 0.5804 0.9782 8.0193 ...
## $ 2001 : num 25.934 0.039 0.573 1.053 7.787 ...
## $ 2002 : num 25.6712 0.0487 0.7208 1.2295 7.5906 ...
## $ 2003 : num 26.4205 0.0518 0.498 1.4127 7.3158 ...
## $ 2004 : num 26.5173 0.0394 0.9962 1.3762 7.3586 ...
## $ 2005 : num 27.2007 0.0529 0.9797 1.4125 7.2999 ...
## $ 2006 : num 26.9483 0.0637 1.0989 1.3026 6.7462 ...
## $ 2007 : num 27.8956 0.0854 1.1978 1.3223 6.5195 ...
## $ 2008 : num 26.231 0.154 1.182 1.484 6.428 ...
## $ 2009 : num 25.916 0.242 1.232 1.496 6.122 ...
## $ 2010 : num 24.671 0.294 1.243 1.579 6.123 ...
## $ 2011 : num 24.506 0.412 1.253 1.804 5.867 ...
## $ 2012 : num 13.16 0.35 1.33 1.69 5.92 ...
## $ 2013 : num 8.351 0.316 1.255 1.749 5.901 ...
## $ 2014 : num 8.408 0.299 1.291 1.979 5.832 ...
## $ 2015 : logi NA NA NA NA NA NA ...
## $ 2016 : logi NA NA NA NA NA NA ...
## $ 2017 : logi NA NA NA NA NA NA ...
## $ 2018 : logi NA NA NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. `Country Name` = col_character(),
## .. `Country Code` = col_character(),
## .. `Indicator Name` = col_character(),
## .. `Indicator Code` = col_character(),
## .. `1960` = col_double(),
## .. `1961` = col_double(),
## .. `1962` = col_double(),
## .. `1963` = col_double(),
## .. `1964` = col_double(),
## .. `1965` = col_double(),
## .. `1966` = col_double(),
## .. `1967` = col_double(),
## .. `1968` = col_double(),
## .. `1969` = col_double(),
## .. `1970` = col_double(),
## .. `1971` = col_double(),
## .. `1972` = col_double(),
## .. `1973` = col_double(),
## .. `1974` = col_double(),
## .. `1975` = col_double(),
## .. `1976` = col_double(),
## .. `1977` = col_double(),
## .. `1978` = col_double(),
## .. `1979` = col_double(),
## .. `1980` = col_double(),
## .. `1981` = col_double(),
## .. `1982` = col_double(),
## .. `1983` = col_double(),
## .. `1984` = col_double(),
## .. `1985` = col_double(),
## .. `1986` = col_double(),
## .. `1987` = col_double(),
## .. `1988` = col_double(),
## .. `1989` = col_double(),
## .. `1990` = col_double(),
## .. `1991` = col_double(),
## .. `1992` = col_double(),
## .. `1993` = col_double(),
## .. `1994` = col_double(),
## .. `1995` = col_double(),
## .. `1996` = col_double(),
## .. `1997` = col_double(),
## .. `1998` = col_double(),
## .. `1999` = col_double(),
## .. `2000` = col_double(),
## .. `2001` = col_double(),
## .. `2002` = col_double(),
## .. `2003` = col_double(),
## .. `2004` = col_double(),
## .. `2005` = col_double(),
## .. `2006` = col_double(),
## .. `2007` = col_double(),
## .. `2008` = col_double(),
## .. `2009` = col_double(),
## .. `2010` = col_double(),
## .. `2011` = col_double(),
## .. `2012` = col_double(),
## .. `2013` = col_double(),
## .. `2014` = col_double(),
## .. `2015` = col_logical(),
## .. `2016` = col_logical(),
## .. `2017` = col_logical(),
## .. `2018` = col_logical()
## .. )
Fix the column names
Here I convert the column names to camel case
names(df) <- names(df) %>% tolower() %>% str_replace(" ", "_")
Let’s take another look
The skimr package contains tools for summarising various data structures in R.
df %>% skim_to_wide()
| character |
country_code |
0 |
264 |
264 |
3 |
3 |
0 |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| character |
country_name |
0 |
264 |
264 |
4 |
52 |
0 |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| character |
indicator_code |
0 |
264 |
264 |
14 |
14 |
0 |
1 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| character |
indicator_name |
0 |
264 |
264 |
38 |
38 |
0 |
1 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| logical |
2015 |
264 |
0 |
264 |
NA |
NA |
NA |
NA |
NaN |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| logical |
2016 |
264 |
0 |
264 |
NA |
NA |
NA |
NA |
NaN |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| logical |
2017 |
264 |
0 |
264 |
NA |
NA |
NA |
NA |
NaN |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| logical |
2018 |
264 |
0 |
264 |
NA |
NA |
NA |
NA |
NaN |
264 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| numeric |
1960 |
72 |
192 |
264 |
NA |
NA |
NA |
NA |
2.04 |
NA |
4.19 |
0.008 |
0.18 |
0.62 |
1.7 |
36.69 |
▇▁▁▁▁▁▁▁ |
| numeric |
1961 |
71 |
193 |
264 |
NA |
NA |
NA |
NA |
2.16 |
NA |
4.38 |
0.0079 |
0.18 |
0.65 |
1.75 |
36.58 |
▇▁▁▁▁▁▁▁ |
| numeric |
1962 |
69 |
195 |
264 |
NA |
NA |
NA |
NA |
2.25 |
NA |
4.72 |
0.0085 |
0.2 |
0.65 |
1.94 |
42.24 |
▇▁▁▁▁▁▁▁ |
| numeric |
1963 |
68 |
196 |
264 |
NA |
NA |
NA |
NA |
2.76 |
NA |
8.46 |
0.0094 |
0.2 |
0.65 |
1.72 |
99.46 |
▇▁▁▁▁▁▁▁ |
| numeric |
1964 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
2.91 |
NA |
8.49 |
0.012 |
0.22 |
0.77 |
2.02 |
92.86 |
▇▁▁▁▁▁▁▁ |
| numeric |
1965 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
3.03 |
NA |
8.7 |
0.012 |
0.23 |
0.7 |
2.19 |
85.46 |
▇▁▁▁▁▁▁▁ |
| numeric |
1966 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
3.04 |
NA |
8.05 |
0.013 |
0.25 |
0.75 |
2.46 |
78.63 |
▇▁▁▁▁▁▁▁ |
| numeric |
1967 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
3.11 |
NA |
7.26 |
0.012 |
0.25 |
0.8 |
2.92 |
77.51 |
▇▁▁▁▁▁▁▁ |
| numeric |
1968 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
3.31 |
NA |
7.18 |
-0.02 |
0.27 |
0.99 |
3.26 |
75.98 |
▇▁▁▁▁▁▁▁ |
| numeric |
1969 |
61 |
203 |
264 |
NA |
NA |
NA |
NA |
3.92 |
NA |
9.9 |
0.016 |
0.32 |
1.06 |
3.6 |
100.7 |
▇▁▁▁▁▁▁▁ |
| numeric |
1970 |
59 |
205 |
264 |
NA |
NA |
NA |
NA |
4.2 |
NA |
9.25 |
0.012 |
0.35 |
1 |
4.01 |
69.11 |
▇▁▁▁▁▁▁▁ |
| numeric |
1971 |
58 |
206 |
264 |
NA |
NA |
NA |
NA |
4.42 |
NA |
9.87 |
0.012 |
0.34 |
1.1 |
4.5 |
76.64 |
▇▁▁▁▁▁▁▁ |
| numeric |
1972 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.49 |
NA |
10.13 |
0.012 |
0.35 |
1.11 |
4.52 |
82.62 |
▇▁▁▁▁▁▁▁ |
| numeric |
1973 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.81 |
NA |
10.75 |
0.011 |
0.37 |
1.14 |
5.12 |
87.65 |
▇▁▁▁▁▁▁▁ |
| numeric |
1974 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.5 |
NA |
9.18 |
0.0097 |
0.37 |
1.23 |
4.64 |
68.23 |
▇▁▁▁▁▁▁▁ |
| numeric |
1975 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.37 |
NA |
8.45 |
0.0097 |
0.38 |
1.29 |
4.85 |
66.64 |
▇▁▁▁▁▁▁▁ |
| numeric |
1976 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.36 |
NA |
8.05 |
0.0099 |
0.36 |
1.36 |
5.17 |
61.29 |
▇▁▁▁▁▁▁▁ |
| numeric |
1977 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.49 |
NA |
7.99 |
0.01 |
0.39 |
1.44 |
5.29 |
54.41 |
▇▂▁▁▁▁▁▁ |
| numeric |
1978 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.51 |
NA |
8.03 |
0.0074 |
0.4 |
1.52 |
5.75 |
54.83 |
▇▂▁▁▁▁▁▁ |
| numeric |
1979 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.56 |
NA |
7.95 |
0.0043 |
0.44 |
1.58 |
5.49 |
69.94 |
▇▁▁▁▁▁▁▁ |
| numeric |
1980 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
4.46 |
NA |
7.41 |
0.036 |
0.45 |
1.53 |
5.49 |
58.53 |
▇▂▁▁▁▁▁▁ |
| numeric |
1981 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
3.99 |
NA |
6.19 |
0.03 |
0.47 |
1.52 |
5.31 |
51.83 |
▇▂▁▁▁▁▁▁ |
| numeric |
1982 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
3.87 |
NA |
5.78 |
0.028 |
0.45 |
1.48 |
5.38 |
44.54 |
▇▂▁▁▁▁▁▁ |
| numeric |
1983 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
3.73 |
NA |
5.29 |
0.031 |
0.45 |
1.37 |
5.41 |
36.41 |
▇▂▁▁▁▁▁▁ |
| numeric |
1984 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
3.82 |
NA |
5.56 |
0.041 |
0.48 |
1.45 |
5.26 |
36.12 |
▇▂▁▁▁▁▁▁ |
| numeric |
1985 |
56 |
208 |
264 |
NA |
NA |
NA |
NA |
3.92 |
NA |
5.6 |
0.035 |
0.47 |
1.54 |
5.56 |
35.89 |
▇▁▁▁▁▁▁▁ |
| numeric |
1986 |
55 |
209 |
264 |
NA |
NA |
NA |
NA |
3.91 |
NA |
5.53 |
0.036 |
0.44 |
1.55 |
4.98 |
33.41 |
▇▂▁▁▁▁▁▁ |
| numeric |
1987 |
55 |
209 |
264 |
NA |
NA |
NA |
NA |
3.94 |
NA |
5.33 |
0.037 |
0.48 |
1.64 |
5.39 |
30.56 |
▇▂▁▁▁▁▁▁ |
| numeric |
1988 |
55 |
209 |
264 |
NA |
NA |
NA |
NA |
4.08 |
NA |
5.48 |
0.012 |
0.51 |
1.76 |
5.8 |
29.21 |
▇▂▁▁▁▁▁▁ |
| numeric |
1989 |
55 |
209 |
264 |
NA |
NA |
NA |
NA |
4.21 |
NA |
5.73 |
0.018 |
0.5 |
1.64 |
5.84 |
31.03 |
▇▂▁▁▁▁▁▁ |
| numeric |
1990 |
49 |
215 |
264 |
NA |
NA |
NA |
NA |
4.08 |
NA |
5.63 |
0.024 |
0.46 |
1.67 |
5.91 |
27.96 |
▇▂▂▁▁▁▁▁ |
| numeric |
1991 |
47 |
217 |
264 |
NA |
NA |
NA |
NA |
4.12 |
NA |
5.68 |
0.011 |
0.45 |
1.86 |
5.99 |
36.32 |
▇▂▁▁▁▁▁▁ |
| numeric |
1992 |
23 |
241 |
264 |
NA |
NA |
NA |
NA |
4.48 |
NA |
6.02 |
0.013 |
0.57 |
2.28 |
6.47 |
54.09 |
▇▂▁▁▁▁▁▁ |
| numeric |
1993 |
23 |
241 |
264 |
NA |
NA |
NA |
NA |
4.5 |
NA |
6.34 |
0.014 |
0.53 |
2.24 |
6.66 |
61.25 |
▇▂▁▁▁▁▁▁ |
| numeric |
1994 |
22 |
242 |
264 |
NA |
NA |
NA |
NA |
4.42 |
NA |
6.24 |
0.015 |
0.57 |
2.19 |
6.45 |
59.6 |
▇▂▁▁▁▁▁▁ |
| numeric |
1995 |
21 |
243 |
264 |
NA |
NA |
NA |
NA |
4.47 |
NA |
6.38 |
0.016 |
0.58 |
2.32 |
6.48 |
61.91 |
▇▂▁▁▁▁▁▁ |
| numeric |
1996 |
21 |
243 |
264 |
NA |
NA |
NA |
NA |
4.49 |
NA |
6.25 |
0.017 |
0.62 |
2.4 |
6.76 |
61.84 |
▇▂▁▁▁▁▁▁ |
| numeric |
1997 |
20 |
244 |
264 |
NA |
NA |
NA |
NA |
4.49 |
NA |
6.56 |
0.019 |
0.68 |
2.27 |
6.58 |
70.14 |
▇▁▁▁▁▁▁▁ |
| numeric |
1998 |
19 |
245 |
264 |
NA |
NA |
NA |
NA |
4.48 |
NA |
6.18 |
0.019 |
0.7 |
2.25 |
6.55 |
58.87 |
▇▂▁▁▁▁▁▁ |
| numeric |
1999 |
19 |
245 |
264 |
NA |
NA |
NA |
NA |
4.45 |
NA |
5.96 |
0.02 |
0.74 |
2.26 |
6.69 |
55.16 |
▇▂▁▁▁▁▁▁ |
| numeric |
2000 |
19 |
245 |
264 |
NA |
NA |
NA |
NA |
4.58 |
NA |
6.37 |
0.017 |
0.74 |
2.34 |
6.61 |
58.64 |
▇▂▁▁▁▁▁▁ |
| numeric |
2001 |
19 |
245 |
264 |
NA |
NA |
NA |
NA |
4.63 |
NA |
6.52 |
0.017 |
0.76 |
2.44 |
6.92 |
67.11 |
▇▁▁▁▁▁▁▁ |
| numeric |
2002 |
18 |
246 |
264 |
NA |
NA |
NA |
NA |
4.6 |
NA |
6.31 |
0.019 |
0.76 |
2.5 |
6.95 |
63.35 |
▇▂▁▁▁▁▁▁ |
| numeric |
2003 |
18 |
246 |
264 |
NA |
NA |
NA |
NA |
4.73 |
NA |
6.36 |
0.019 |
0.8 |
2.63 |
7.24 |
60.3 |
▇▂▁▁▁▁▁▁ |
| numeric |
2004 |
18 |
246 |
264 |
NA |
NA |
NA |
NA |
4.78 |
NA |
6.29 |
0.023 |
0.84 |
2.66 |
7.13 |
56.59 |
▇▂▁▁▁▁▁▁ |
| numeric |
2005 |
17 |
247 |
264 |
NA |
NA |
NA |
NA |
4.82 |
NA |
6.41 |
0.021 |
0.86 |
2.77 |
7.03 |
58.92 |
▇▂▁▁▁▁▁▁ |
| numeric |
2006 |
16 |
248 |
264 |
NA |
NA |
NA |
NA |
4.9 |
NA |
6.56 |
0.024 |
0.8 |
2.92 |
7.04 |
62.82 |
▇▂▁▁▁▁▁▁ |
| numeric |
2007 |
15 |
249 |
264 |
NA |
NA |
NA |
NA |
4.93 |
NA |
6.39 |
0.024 |
0.89 |
2.89 |
6.93 |
53.19 |
▇▂▁▁▁▁▁▁ |
| numeric |
2008 |
15 |
249 |
264 |
NA |
NA |
NA |
NA |
4.94 |
NA |
6.18 |
0.023 |
0.82 |
3.03 |
7.01 |
46.67 |
▇▂▁▁▁▁▁▁ |
| numeric |
2009 |
15 |
249 |
264 |
NA |
NA |
NA |
NA |
4.72 |
NA |
5.86 |
0.022 |
0.83 |
2.95 |
6.32 |
43.51 |
▇▃▁▁▁▁▁▁ |
| numeric |
2010 |
15 |
249 |
264 |
NA |
NA |
NA |
NA |
4.84 |
NA |
5.89 |
0.024 |
0.82 |
2.93 |
6.64 |
40.74 |
▇▂▁▁▁▁▁▁ |
| numeric |
2011 |
15 |
249 |
264 |
NA |
NA |
NA |
NA |
4.81 |
NA |
5.82 |
0.027 |
0.84 |
2.93 |
6.72 |
41.21 |
▇▃▁▁▁▁▁▁ |
| numeric |
2012 |
13 |
251 |
264 |
NA |
NA |
NA |
NA |
4.95 |
NA |
6.19 |
0.03 |
0.83 |
3.03 |
6.66 |
44.62 |
▇▃▁▁▁▁▁▁ |
| numeric |
2013 |
13 |
251 |
264 |
NA |
NA |
NA |
NA |
4.86 |
NA |
5.86 |
0.03 |
0.85 |
3.01 |
6.71 |
37.78 |
▇▃▁▁▁▁▁▁ |
| numeric |
2014 |
14 |
250 |
264 |
NA |
NA |
NA |
NA |
4.87 |
NA |
6.1 |
0.044 |
0.88 |
3.15 |
6.37 |
45.42 |
▇▂▁▁▁▁▁▁ |
Is anything missing
library(mi)
df %>% as.data.frame() %>% missing_data.frame() %>% image()

Those aren’t Countries!
There are quite a few regional and economic groups included as countries. I decided to remove these and keep just the countries and the value for the world as a whole. The datatable made it easier to browse through the data and find what I wanted to remove.
# Lookup table mapping country codes to names
country_names <- df %>%
pull(country_name) %>%
set_names(df %>% pull(country_code)) %>%
sort()
df %>%
select(country_code, country_name) %>%
unique() %>%
datatable()
non_countries <- c(
"ARB", "CEB", "CSS", "EAP", "EAR", "EAS", "ECA", "ECS", "EMU",
"EUU", "FCS", "HIC", "HPC", "IBD", "IBT", "IDA", "IDB", "IDX", "INX",
"LAC", "LCN", "LDC", "LIC", "LMC", "LMY", "LTE", "MEA", "MIC", "MNA",
"NAC", "OED", "OSS", "PRE", "PSS", "PST", "SAS", "SSA", "SSF", "SST",
"TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC"
)
# remove the unwated rows
df <- df %>% filter(!(country_code %in% non_countries))
Reshape the table
Here I create a long data frame with columns indicating the country, year, and carbon dioxide emissions
long_df <- df %>%
select(-indicator_code, -indicator_name, -country_name) %>%
gather(key = "year", value = "co2_per_cap", -country_code) %>%
mutate(year = as.integer(year))
skim_to_wide(long_df)
| character |
country_code |
0 |
12862 |
12862 |
3 |
3 |
0 |
218 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| integer |
year |
0 |
12862 |
12862 |
NA |
NA |
NA |
NA |
1989 |
17.03 |
1960 |
1974 |
1989 |
2004 |
2018 |
▇▇▇▇▇▇▇▇ |
| numeric |
co2_per_cap |
2957 |
9905 |
12862 |
NA |
NA |
NA |
NA |
4.4 |
7.43 |
-0.02 |
0.37 |
1.61 |
6.01 |
100.7 |
▇▁▁▁▁▁▁▁ |
Clean up the missing data
I removed the years from 2015, which contained no data. I also created a separate data frame containg only the countries with complete records since 1960.
# 2015 and on don't have any data. rather than remove them by hand, This does so programatically
good_years <- long_df %>%
group_by(year) %>%
summarise(has_data = any(!is.na(co2_per_cap))) %>%
filter(has_data) %>%
pull(year)
long_df <- long_df %>% filter(year %in% good_years)
skim_to_wide(long_df)
| character |
country_code |
0 |
11990 |
11990 |
3 |
3 |
0 |
218 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| integer |
year |
0 |
11990 |
11990 |
NA |
NA |
NA |
NA |
1987 |
15.88 |
1960 |
1973 |
1987 |
2001 |
2014 |
▇▇▇▇▇▇▇▇ |
| numeric |
co2_per_cap |
2085 |
9905 |
11990 |
NA |
NA |
NA |
NA |
4.4 |
7.43 |
-0.02 |
0.37 |
1.61 |
6.01 |
100.7 |
▇▁▁▁▁▁▁▁ |
# create a dataframe with every available record
long_all <- long_df %>% filter(is.na(co2_per_cap))
# Choose only the countries that have complete records.
complete_countries <- long_df %>%
group_by(country_code) %>%
summarise(all_years = !any(is.na(co2_per_cap))) %>%
filter(all_years) %>%
pull(country_code)
long_df <- long_df %>%
filter(country_code %in% complete_countries)
skim_to_wide(long_df)
| character |
country_code |
0 |
8250 |
8250 |
3 |
3 |
0 |
150 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| integer |
year |
0 |
8250 |
8250 |
NA |
NA |
NA |
NA |
1987 |
15.88 |
1960 |
1973 |
1987 |
2001 |
2014 |
▇▇▇▇▇▇▇▇ |
| numeric |
co2_per_cap |
0 |
8250 |
8250 |
NA |
NA |
NA |
NA |
4.36 |
7.55 |
-0.02 |
0.35 |
1.58 |
5.93 |
100.7 |
▇▁▁▁▁▁▁▁ |
long_df %>% datatable()
Time to take a look
Here I’ll take a look at the countries which have a complete record from 1960 to 2014 ### Everything
ggplot(data = long_df) +
geom_line(aes(x = year, y = co2_per_cap, group = country_code))

This is pretty crowded and hard to see much on.
Countries with the Highest Emissions
These countries had the highest average over the course of the dataset
long_df %>%
group_by(country_code) %>%
summarise(co2_avg = mean(co2_per_cap)) %>%
arrange(desc(co2_avg)) %>%
head(10) %>%
ggplot() +
geom_col(aes(x = country_code, y = co2_avg)) +
scale_x_discrete(name = "Country", labels = function(x) country_names[x])

A Closer Look at the Top Counries
long_df %>%
group_by(country_code) %>%
summarise(co2_avg = mean(co2_per_cap)) %>%
arrange(desc(co2_avg)) %>%
head(10) %>%
pull(country_code) %>%
lapply(
function(x) ggplot(filter(long_df, country_code == x), aes(year, co2_per_cap)) +
geom_line() +
labs(title = country_names[x]) +
coord_cartesian(ylim = c(0, 100))
) %>%
walk(print)

There isn’t a consistent pattern amoung the countries with the highest per capita carbon dioxide emissions. Some are highly industrialized Western nations, which tend to have stable but high emissions. The others tend to be less stable over time, with spikes of extreme emission levels. Qatar is unique in that it’s baseline is consistently higher than anyone else’s and it has spikes to even more extreme levels